# !/bin/bash

# DAR-B
torchrun --nnodes=4 --nproc_per_node=8 \
    --node_rank=$RANK --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT \
    autoregressive/train/train.py \
    --dataset dar \
    --code-path ./dataset/ibq_train_tencrop.jsonl \
    --vqgan-ckpt ./vqgan_ckpt/imagenet256_16384.ckpt \
    --cloud-save-path ./dar_b_ckpt \
    --results-dir ./dar_b_ckpt/results \
    --vocab-size 16384 \
    --image-size 256 \
    --gpt-model DAR-B \
    --pe-type mrope-4d \
    --image-order diag \
    --directions 4 \
    --condition \
    --rope-base 10000 \
    --epochs 40 \
    --warmup-epochs 10 \
    --lr 1e-3 \
    --end-lr 1e-5 \
    --beta1 0.9 \
    --beta2 0.96 \
    --weight-decay 0.05 \
    --global-batch-size 2048 \
    --num-workers 8 \
    --log-every 100 \
    --ckpt-every 50000


# DAR-L
#torchrun --nnodes=8 --nproc_per_node=8 \
#    --node_rank=$RANK --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT \
#    autoregressive/train/train.py \
#    --dataset dar \
#    --code-path ./dataset/ibq_train_tencrop.jsonl \
#    --vqgan-ckpt ./vqgan_ckpt/imagenet256_16384.ckpt \
#    --cloud-save-path ./dar_l_ckpt \
#    --results-dir ./dar_l_ckpt/results \
#    --vocab-size 16384 \
#    --image-size 256 \
#    --gpt-model DAR-L \
#    --pe-type mrope-4d \
#    --image-order diag \
#    --directions 4 \
#    --condition \
#    --rope-base 10000 \
#    --epochs 40 \
#    --warmup-epochs 10 \
#    --lr 4e-4 \
#    --end-lr 1e-5 \
#    --beta1 0.9 \
#    --beta2 0.96 \
#    --weight-decay 0.03 \
#    --global-batch-size 2048 \
#    --num-workers 8 \
#    --log-every 100 \
#    --ckpt-every 50000

# DAR-XL
#torchrun --nnodes=16 --nproc_per_node=8 \
#    --node_rank=$RANK --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT \
#    autoregressive/train/train.py \
#    --dataset dar \
#    --code-path ./dataset/ibq_train_tencrop.jsonl \
#    --vqgan-ckpt ./vqgan_ckpt/imagenet256_16384.ckpt \
#    --cloud-save-path ./dar_xl_ckpt \
#    --results-dir ./dar_xl_ckpt/results \
#    --vocab-size 16384 \
#    --image-size 256 \
#    --gpt-model DAR-XL \
#    --pe-type mrope-4d \
#    --image-order diag \
#    --directions 4 \
#    --condition \
#    --rope-base 10000 \
#    --epochs 40 \
#    --warmup-epochs 10 \
#    --lr 4e-4 \
#    --end-lr 1e-5 \
#    --beta1 0.9 \
#    --beta2 0.96 \
#    --weight-decay 0.05 \
#    --global-batch-size 2048 \
#    --num-workers 8 \
#    --log-every 100 \
#    --ckpt-every 50000
